---
title: "R_SM6"
author: "Loreto Cox & Carmen Le Foulon"
date: "2024-01-27"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(readxl)
library(haven)

```

# Info
Replication: Table 4, Figures SM8 A & B

# Data

Magnitude data, comes from https://datafinder.qog.gu.se/dataset/gol; http://mattgolder.com/elections

```{r}
mag<-read_dta("data/mag.dta")

# Extracting election year
mag$year<-substr(mag$date, nchar(mag$date)-3, nchar(mag$date))
mag$year<-as.numeric(mag$year)
```

Data from idea, containing invalid voting (note that Idea's merged data contained more missing data)
available at https://www.idea.int/data-tools/data/voter-turnout-database, downloaded on November, 30, 2022
```{r}

idea<-read_excel("data/idea_turnout.xls")
#renaming columns
colnames(idea)
colnames(idea)[1]<-"country"
colnames(idea)[3]<-"year"

## To extract idea's country codes
code<-read_excel("data/idea_country_code.xls")
code<-code[,c(1,3)]
colnames(code)<-c("country", "country_code")
code<-unique(code)

# Merging idea's database with codes
idea<-left_join(idea, code)

# Uniforming country names in both data bases
table(mag$country)
table(idea$country)
sum(unique(mag$country)%in%unique(idea$country))  ### name of country provides best match
sum(unique(idea$country)%in%unique(mag$country))
unique(mag$country)[!(unique(mag$country)%in%unique(idea$country))]
unique(idea$country)[!(unique(idea$country)%in%unique(mag$country))]
# Removing spaces from the end of country names
for (i in c(1:nrow(mag))){
    if (mag$country[i]%in%mag$country[substr(mag$country, nchar(mag$country), nchar(mag$country))==" "]){
    mag$country[i]<-substr(mag$country[i], 1,(nchar(mag$country[i])-1))
    }}
# Arranging spelling differences
mag$country[mag$country=="Argentinia" ]<-"Argentina" 
mag$country[mag$country== "United States of America"]<- "United States"
mag$country[mag$country=="Congo" ]<-"Congo, Democratic Republic of"
mag$country[mag$country=="Antigua & Barbuda" ]<-"Antigua and Barbuda"
mag$country[mag$country=="Bosnia" ]<-"Bosnia and Herzegovina"
mag$country[mag$country=="Cape Verde" ]<-"Cabo Verde"
mag$country[mag$country=="Greek Cyprus" ]<-"Cyprus"
mag$country[mag$country=="Czechoslovakia" ]<-"Czech Republic"
mag$country[mag$country=="East Timor" ]<-"Timor-Leste"
mag$country[mag$country=="Republic of Fiji" ]<-"Fiji"
mag$country[mag$country=="Czechoslovakia" ]<-"Czech Republic"
mag$country[mag$country=="East Timor" ]<-"Timor-Leste"
mag$country[mag$country=="Republic of Fiji" ]<-"Fiji"
mag$country[mag$country=="Micronesia; Federated States of" ]<-"Micronesia, Federated States of"
mag$country[mag$country=="Republic of the Gambia" ]<-"Gambia"
mag$country[mag$country=="Republic of Guinea-Bissau"|mag$country=="Guinea Bissau" ]<-"Guinea-Bissau"
mag$country[mag$country=="Republic of Guatemala" ]<-"Guatemala"
mag$country[mag$country=="Moldova" ]<-"Moldova, Republic of" 
mag$country[mag$country=="Republic of Serbia" ]<-"Serbia"
mag$country[mag$country=="Russia" ]<-"Russian Federation"
mag$country[mag$country=="The Co-operative Republic of Guyana" ]<-"Guyana"
mag$country[mag$country=="South Korea" ]<-"Korea, Republic of"
mag$country[mag$country=="Republic of Kosovo" ]<-"Kosovo"
mag$country[mag$country=="St.Vincent and the Grenadines"| mag$country=="Saint Vincent and the Grenadines"]<-"Saint Vincent and The Grenadines"
mag$country[mag$country=="Laos PDR" ]<-"Lao People's Dem. Republic"
mag$country[mag$country=="St. Lucia" ]<-"Saint Lucia"
mag$country[mag$country=="St. Kitts and Nevis" ]<-"Saint Kitts and Nevis"



## Merging both databases (mag and idea)
mag2<-mag[,c("country", "year", "tier1_avemag")] # selecting variables of interest
mag2<-unique(mag2)
mag2$tier1_avemag[mag2$tier1_avemag==-99]<-NA
mag2$tier1_avemag[mag2$tier1_avemag>50]<-NA # I exclude cases with national districts
mag2<-mag2[is.na(mag2$tier1_avemag)==F,]

idea<-left_join(idea, mag2, by=c("country", "year"))

```

# Analysis

```{r}
idea$turnout<-as.numeric(idea$`Voter Turnout`)
idea$invalid<-as.numeric(idea$`Invalid votes`)
idea<-idea[idea$`Election type`!="EU Parliament",] # removing EU elections

## Creating dummy for concurrent presidential and parliamentary election
# election ID
idea$election<-paste(idea$country, idea$year) 
# Election IDs of presidential/parliamentary elections
unique_pres<-unique(idea$election[idea$`Election type`=="Presidential"])
unique_par<-unique(idea$election[idea$`Election type`=="Parliamentary"])
# Election IDs that happen at the same time
conc<-unique_pres[unique_pres%in%unique_par]
# Pasting this variable in the database
idea$concur<-idea$election%in%conc
# Subsetting to concurrent elections with voluntary voting
idea<- idea[idea$concur==1&idea$`Compulsory voting`=="No",]
# Checking there is data for both presidential and legislative elections in concurrent elections
full_data_pres<-unique(idea$election[idea$`Election type`=="Presidential"&is.na(idea$invalid)==F&is.na(idea$tier1_avemag)==F]) # full data for pres
full_data_par<-unique(idea$election[idea$`Election type`=="Parliamentary"&is.na(idea$invalid)==F&is.na(idea$tier1_avemag)==F]) # full data for par
idea<-idea[idea$election%in%full_data_pres&idea$election%in%full_data_par,] # subsetting to observations with full data for both


# Number of observations
sum(is.na(idea$tier1_avemag[idea$`Election type`=="Presidential"])==F&is.na(idea$invalid[idea$`Election type`=="Presidential"])==F)
sum(is.na(idea$tier1_avemag[idea$`Election type`=="Parliamentary"])==F&is.na(idea$invalid[idea$`Election type`=="Parliamentary"])==F)

# correlations
cor.test(idea$tier1_avemag[idea$`Election type`=="Presidential"], idea$invalid[idea$`Election type`=="Presidential"])
cor.test(idea$tier1_avemag[idea$`Election type`=="Parliamentary"], idea$invalid[idea$`Election type`=="Parliamentary"])


# in logs (Table 4, row 1)
cor.test(log(idea$tier1_avemag[idea$`Election type`=="Presidential"]), idea$invalid[idea$`Election type`=="Presidential"])
cor.test(log(idea$tier1_avemag[idea$`Election type`=="Parliamentary"]), idea$invalid[idea$`Election type`=="Parliamentary"])


## And since 1990
# Number of observations
sum(is.na(idea$tier1_avemag[idea$`Election type`=="Presidential"&idea$year>1989])==F&is.na(idea$invalid[idea$`Election type`=="Presidential"&idea$year>1989])==F)
sum(is.na(idea$tier1_avemag[idea$`Election type`=="Parliamentary"&idea$year>1989])==F&is.na(idea$invalid[idea$`Election type`=="Parliamentary"&idea$year>1989])==F)

# correlations
cor.test(idea$tier1_avemag[idea$`Election type`=="Presidential"&idea$year>1989], idea$invalid[idea$`Election type`=="Presidential"&idea$year>1989])
cor.test(idea$tier1_avemag[idea$`Election type`=="Parliamentary"&idea$year>1989], idea$invalid[idea$`Election type`=="Parliamentary"&idea$year>1989])

# in logs (Table 4, row 2)
cor.test(log(idea$tier1_avemag[idea$`Election type`=="Presidential"&idea$year>1989]), idea$invalid[idea$`Election type`=="Presidential"&idea$year>1989])
cor.test(log(idea$tier1_avemag[idea$`Election type`=="Parliamentary"&idea$year>1989]), idea$invalid[idea$`Election type`=="Parliamentary"&idea$year>1989])


```

# Figures

## Figure SM8

### All

```{r}
plot(log(idea$tier1_avemag[idea$`Election type`=="Parliamentary"]), idea$invalid[idea$`Election type`=="Parliamentary"], pch=16, cex=.5, col="blue", ylim=c(0,32), 
     xlab="Average district magnitude", ylab="Invalid voting", main="All observations",xaxt='n')
axis(1, at=log(c(1:22)),labels =c(1:22) )
abline(lm(idea$invalid[idea$`Election type`=="Parliamentary"]~ log(idea$tier1_avemag[idea$`Election type`=="Parliamentary"])), col="blue",lty=1)
points(log(idea$tier1_avemag[idea$`Election type`=="Presidential"]), idea$invalid[idea$`Election type`=="Presidential"], pch=2, cex=.5, col="red")
abline(lm(idea$invalid[idea$`Election type`=="Presidential"]~ log(idea$tier1_avemag[idea$`Election type`=="Presidential"])), col="red",lty=2)
legend("topright", c("Legislative", "Presidential"), pch=c(16,2), col=c("blue","red"), lty = c(1,2))

```

### Since 1990

```{r}
plot(log(idea$tier1_avemag[idea$`Election type`=="Parliamentary"&idea$year>1989]), idea$invalid[idea$`Election type`=="Parliamentary"&idea$year>1989], pch=16, cex=.5, col="blue", ylim=c(0,32), 
     xlab="Average district magnitude", ylab="Invalid voting", main="Since 1990",xaxt='n')
axis(1, at=log(c(1:22)),labels =c(1:22) )
abline(lm(idea$invalid[idea$`Election type`=="Parliamentary"&idea$year>1989]~ log(idea$tier1_avemag[idea$`Election type`=="Parliamentary"&idea$year>1989])), col="blue",lty=1)
points(log(idea$tier1_avemag[idea$`Election type`=="Presidential"&idea$year>1989]), idea$invalid[idea$`Election type`=="Presidential"&idea$year>1989], pch=2, cex=.5, col="red")
abline(lm(idea$invalid[idea$`Election type`=="Presidential"&idea$year>1989]~ log(idea$tier1_avemag[idea$`Election type`=="Presidential"&idea$year>1989])), col="red",lty=2)
legend("topright", c("Legislative", "Presidential"), pch=c(16,2), col=c("blue","red"), lty = c(1,2))

```

